#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time      :2021/3/25 8:49
# @Author    :cjw
import requests
import re

if __name__ == '__main__':
	# 1、定位到2020必看片
	# 2、从2020必看片中提取到子页面的链接地址
	# 3、请求子页面的链接地址，获取到影片的下载地址
	url = 'https://www.dy2018.com/'
	headers = {
		'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
	}
	resp = requests.get(url, headers=headers, verify=False, timeout=60)  # verify=False 去掉安全验证
	resp.encoding = 'gb2312'
	index_page = resp.text

	# 1、获取index页面中‘2020必看热片’ul中li
	li_obj = re.compile(r'2020必看热片.*?<ul>(?P<li>.*?</ul>)', re.S)
	li_result = li_obj.finditer(index_page)
	lts = [lt.group('li') for lt in li_result][-1]

	# 2、从2020必看片中提取到子页面的链接地址
	href_obj = re.compile(r"<a href='(?P<url>.*?)'", re.S)
	href_result = href_obj.finditer(lts)
	href_list = [ut.group('url') for ut in href_result]
	url_list = [url + href.lstrip('/') for href in href_list]
	resp.close()  # 关闭链接

	# 3、请求子页面的链接地址，获取到影片的下载地址
	movie_obj = re.compile(r'◎片　　名(?P<movie>.*?)<br />.*?<td style="WORD-WRAP: break-word" '
	                       r'bgcolor="#fdfddf"><a href="(?P<download>.*?)">', re.S)
	for url in url_list:
		child_resp = requests.get(url, headers=headers, verify=False, timeout=60)
		child_resp.encoding = 'gb2312'
		child_page_text = child_resp.text
		child_result = movie_obj.finditer(child_page_text)
		for ct in child_result:
			print(ct.group('movie').strip())
			print(ct.group('download'))
		child_resp.close()
